﻿/*=====================================================================
  File:      NLPlib.cs

  Summary:   part of speech tagger

---------------------------------------------------------------------

  Copyright (C) Mark Watson.  All rights reserved.

THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
PARTICULAR PURPOSE.
=====================================================================*/


using System;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Formatters.Binary;
using System.Collections;
using System.Text.RegularExpressions;
using System.Collections.Generic;

namespace WordNetTools
{
    public class NLPlib
    {

        private static Hashtable lexHash = null;

        public NLPlib()
        {
            if (lexHash != null) return; // singleton pattern
            lexHash = new Hashtable();
            Stream file = File.Open("lex.dat", FileMode.Open);
            IFormatter formatter = (IFormatter)new BinaryFormatter();
            lexHash = formatter.Deserialize(file) as Hashtable;
            file.Close();
            Console.WriteLine("Initialized lexHash from serialized data.");
        }

        public ArrayList tokenize(string s)
        {
            ArrayList v = new ArrayList();
            Regex reg = new Regex(@"(\S+)\s");
            MatchCollection m = reg.Matches(s);
            foreach (Match m2 in m)
            {
                if (m2.Length != 0)
                {
                    string z = m2.ToString().Trim();
                    if (z.EndsWith(";") || z.EndsWith(",") ||
                        z.EndsWith("?") || z.EndsWith(")") ||
                        z.EndsWith(":") || z.EndsWith("."))
                    {
                        z = z.Substring(0, z.Length - 1);
                    }
                    v.Add(z);
                }
            }
            return v;
        }
        public ArrayList tag(ArrayList words)
        {
            ArrayList ret = new ArrayList();
            for (int i = 0, size = words.Count; i < size; i++)
            {
                ret.Add("NN");  // default
                string s = (string)lexHash[words[i]];
                // 1/22/2002 mod (from Lisp code): if not in hash, try lower case:
                if (s == null)
                    s = (string)lexHash[((string)words[i]).ToLower()];
                if (s != null)
                {
                    int index = s.IndexOf(" ");
                    if (index > -1) ret[i] = s.Substring(0, index).Trim();
                    else ret[i] = s;
                }
            }
            /**
             * Apply transformational rules
             **/
            for (int i = 0; i < words.Count; i++)
            {
                //  rule 1: DT, {VBD | VBP} --> DT, NN
                if (i > 0 && ret[i - 1].Equals("DT"))
                {
                    if (ret[i].Equals("VBD")
                        || ret[i].Equals("VBP")
                        || ret[i].Equals("VB"))
                    {
                        ret[i] = "NN";
                    }
                }
                // rule 2: convert a noun to a number (CD) if "." appears in the word
                if (((string)ret[i]).StartsWith("N"))
                {
                    if (((string)words[i]).IndexOf(".") > -1)
                        ret[i] = "CD";
                }
                // rule 3: convert a noun to a past participle if ((string)words[i]) ends with "ed"
                if (((string)ret[i]).StartsWith("N") && ((string)words[i]).EndsWith("ed"))
                    ret[i] = "VBN";
                // rule 4: convert any type to adverb if it ends in "ly";
                if (((string)words[i]).EndsWith("ly"))
                    ret[i] = "RB";
                // rule 5: convert a common noun (NN or NNS) to a adjective if it ends with "al"
                if (((string)ret[i]).StartsWith("NN") && ((string)words[i]).EndsWith("al"))
                    ret[i] = "JJ";
                // rule 6: convert a noun to a verb if the preceeding work is "would"
                if (i > 0
                    && ((string)ret[i]).StartsWith("NN")
                    && ((string)words[i - 1]).ToLower().Equals("would"))
                    ret[i] = "VB";
                // rule 7: if a word has been categorized as a common noun and it ends with "s",
                //         then set its type to plural common noun (NNS)
                if (((string)ret[i]).Equals("NN") && ((string)words[i]).EndsWith("s"))
                    ret[i] = "NNS";
                // rule 8: convert a common noun to a present prticiple verb (i.e., a gerand)
                if (((string)ret[i]).StartsWith("NN") && ((string)words[i]).EndsWith("ing"))
                    ret[i] = "VBG";
            }
            return ret;
        }

        public ArrayList tag(List<AnalysedWord> words)
        {
            ArrayList ret = new ArrayList();
            for (int i = 0, size = words.Count; i < size; i++)
            {
                ret.Add("NN");  // default
                string s = (string)lexHash[words[i].Word];
                // 1/22/2002 mod (from Lisp code): if not in hash, try lower case:
                if (s == null)
                    s = (string)lexHash[((string)words[i].Word).ToLower()];
                if (s != null)
                {
                    int index = s.IndexOf(" ");
                    if (index > -1) ret[i] = s.Substring(0, index).Trim();
                    else ret[i] = s;
                }
            }
            /**
             * Apply transformational rules
             **/
            for (int i = 0; i < words.Count; i++)
            {
                //  rule 1: DT, {VBD | VBP} --> DT, NN
                if (i > 0 && ret[i - 1].Equals("DT"))
                {
                    if (ret[i].Equals("VBD")
                        || ret[i].Equals("VBP")
                        || ret[i].Equals("VB"))
                    {
                        ret[i] = "NN";
                    }
                }
                // rule 2: convert a noun to a number (CD) if "." appears in the word
                if (((string)ret[i]).StartsWith("N"))
                {
                    if (((string)words[i].Word).IndexOf(".") > -1)
                        ret[i] = "CD";
                }
                // rule 3: convert a noun to a past participle if ((string)words[i]) ends with "ed"
                if (((string)ret[i]).StartsWith("N") && ((string)words[i].Word).EndsWith("ed"))
                    ret[i] = "VBN";
                // rule 4: convert any type to adverb if it ends in "ly";
                if (((string)words[i].Word).EndsWith("ly"))
                    ret[i] = "RB";
                // rule 5: convert a common noun (NN or NNS) to a adjective if it ends with "al"
                if (((string)ret[i]).StartsWith("NN") && ((string)words[i].Word).EndsWith("al"))
                    ret[i] = "JJ";
                // rule 6: convert a noun to a verb if the preceeding work is "would"
                if (i > 0
                    && ((string)ret[i]).StartsWith("NN")
                    && ((string)words[i - 1].Word).ToLower().Equals("would"))
                    ret[i] = "VB";
                // rule 7: if a word has been categorized as a common noun and it ends with "s",
                //         then set its type to plural common noun (NNS)
                if (((string)ret[i]).Equals("NN") && ((string)words[i].Word).EndsWith("s"))
                    ret[i] = "NNS";
                // rule 8: convert a common noun to a present prticiple verb (i.e., a gerand)
                if (((string)ret[i]).StartsWith("NN") && ((string)words[i].Word).EndsWith("ing"))
                    ret[i] = "VBG";
            }
            return ret;
        }

        //public static void Main(String[] args)
        //{
        //    NLPlib tagger = new NLPlib();
        //    string s = "The dog's paw was bit. We blame the cat; is that fair? ";
        //    ArrayList v = tagger.tokenize(s);
        //    ArrayList t = tagger.tag(v);
        //    for (int i = 0; i < v.Count; i++)
        //    {
        //        Console.WriteLine((string)v[i] + "/" + (string)t[i]);
        //    }
        //}
    }
}
